Răsfoiți Sursa

feat(rdf): parse feed and item language from RDF/RSS 1.0 dc:language

Extends language parsing to RDF/RSS 1.0 feeds by reading the
Dublin Core dc:language element at both the channel and item level.
Values are normalized the same way as for RSS 2.0 and Atom.
Bram Duvigneau 1 zi în urmă
părinte
comite
212c3e13d5

+ 2 - 1
internal/reader/dublincore/dublincore.go

@@ -4,7 +4,8 @@
 package dublincore // import "miniflux.app/v2/internal/reader/dublincore"
 
 type DublinCoreChannelElement struct {
-	DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
+	DublinCoreCreator  string `xml:"http://purl.org/dc/elements/1.1/ creator"`
+	DublinCoreLanguage string `xml:"http://purl.org/dc/elements/1.1/ language"`
 }
 
 type DublinCoreItemElement struct {

+ 3 - 0
internal/reader/rdf/adapter.go

@@ -26,6 +26,7 @@ func (r *rdfAdapter) buildFeed(baseURL string) *model.Feed {
 		FeedURL:     strings.TrimSpace(baseURL),
 		SiteURL:     strings.TrimSpace(r.rdf.Channel.Link),
 		Description: strings.TrimSpace(r.rdf.Channel.Description),
+		Language:    model.NormalizeLanguage(r.rdf.Channel.DublinCoreLanguage),
 	}
 
 	if feed.Title == "" {
@@ -100,6 +101,8 @@ func (r *rdfAdapter) buildFeed(baseURL string) *model.Feed {
 			entry.Author = sanitizer.StripTags(r.rdf.Channel.DublinCoreCreator)
 		}
 
+		entry.Language = model.NormalizeLanguage(item.DublinCoreLanguage)
+
 		feed.Entries = append(feed.Entries, entry)
 	}
 

+ 96 - 0
internal/reader/rdf/parser_test.go

@@ -848,3 +848,99 @@ func TestParseFeedWithInvalidCharacterEntity(t *testing.T) {
 		t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL)
 	}
 }
+
+func TestParseFeedWithChannelLanguage(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+	<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns="http://purl.org/rss/1.0/">
+	  <channel>
+			<title>Example Feed</title>
+			<link>http://example.org</link>
+			<dc:language>EN-us</dc:language>
+	  </channel>
+	  <item>
+			<title>Item</title>
+			<link>http://example.org/item</link>
+	  </item>
+	</rdf:RDF>`
+
+	feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if feed.Language != "en-us" {
+		t.Errorf(`Incorrect feed language, got: %q`, feed.Language)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Fatalf(`Unexpected entry count, got: %d`, len(feed.Entries))
+	}
+
+	if feed.Entries[0].Language != "" {
+		t.Errorf(`Expected empty entry language, got: %q`, feed.Entries[0].Language)
+	}
+}
+
+func TestParseFeedWithItemLanguage(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+	<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns="http://purl.org/rss/1.0/">
+	  <channel>
+			<title>Example Feed</title>
+			<link>http://example.org</link>
+			<dc:language>en</dc:language>
+	  </channel>
+	  <item>
+			<title>Item</title>
+			<link>http://example.org/item</link>
+			<dc:language>fr_CA</dc:language>
+	  </item>
+	</rdf:RDF>`
+
+	feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if feed.Language != "en" {
+		t.Errorf(`Incorrect feed language, got: %q`, feed.Language)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Fatalf(`Unexpected entry count, got: %d`, len(feed.Entries))
+	}
+
+	if feed.Entries[0].Language != "fr-ca" {
+		t.Errorf(`Incorrect entry language, got: %q`, feed.Entries[0].Language)
+	}
+}
+
+func TestParseFeedWithoutLanguage(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+	<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
+	  <channel>
+			<title>Example Feed</title>
+			<link>http://example.org</link>
+	  </channel>
+	  <item>
+			<title>Item</title>
+			<link>http://example.org/item</link>
+	  </item>
+	</rdf:RDF>`
+
+	feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if feed.Language != "" {
+		t.Errorf(`Expected empty feed language, got: %q`, feed.Language)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Fatalf(`Unexpected entry count, got: %d`, len(feed.Entries))
+	}
+
+	if feed.Entries[0].Language != "" {
+		t.Errorf(`Expected empty entry language, got: %q`, feed.Entries[0].Language)
+	}
+}