Просмотр исходного кода

fix(atom): restrict language parsing to namespace-qualified xml:lang

The Language fields added for Atom 1.0 and 0.3 feeds and entries used
the struct tag `xml:"lang,attr"`, which encoding/xml matches against a
lang attribute from any namespace, with the last one in document order
winning. A feed carrying e.g. foo:lang="zz" after xml:lang="fr" would
be stored with language "zz".

Qualify the tags with the XML namespace so only xml:lang matches. As a
side effect, a bare non-standard lang attribute is now ignored instead
of being treated as the feed language.

Also document each Language field and add regression tests covering
foreign-namespace and unqualified lang attributes, plus first coverage
of xml:lang parsing for Atom 0.3.
Fred 1 день назад
Родитель
Сommit
766d298095

+ 10 - 2
internal/reader/atom/atom_03.go

@@ -13,7 +13,11 @@ import (
 type atom03Feed struct {
 	Version string `xml:"version,attr"`
 
-	Language string `xml:"lang,attr"`
+	// Language is the natural language of the feed, declared by an
+	// xml:lang attribute on the atom:feed element. The tag is
+	// namespace-qualified so that lang attributes from other namespaces
+	// cannot override the real xml:lang value.
+	Language string `xml:"http://www.w3.org/XML/1998/namespace lang,attr"`
 
 	// The "atom:id" element's content conveys a permanent, globally unique identifier for the feed.
 	// It MUST NOT change over time, even if the feed is relocated. atom:feed elements MAY contain an atom:id element,
@@ -49,7 +53,11 @@ type atom03Entry struct {
 	// If the same entry is syndicated in two atom:feeds published by the same entity, the entry's atom:id MUST be the same in both feeds.
 	ID string `xml:"id"`
 
-	Language string `xml:"lang,attr"`
+	// Language is the natural language of the entry, declared by an
+	// xml:lang attribute on the atom:entry element. The tag is
+	// namespace-qualified so that lang attributes from other namespaces
+	// cannot override the real xml:lang value.
+	Language string `xml:"http://www.w3.org/XML/1998/namespace lang,attr"`
 
 	// The "atom:title" element is a Content construct that conveys a human-readable title for the entry.
 	// atom:entry elements MUST have exactly one "atom:title" element.

+ 33 - 0
internal/reader/atom/atom_03_test.go

@@ -294,3 +294,36 @@ func TestParseAtom03WithBase64Content(t *testing.T) {
 		t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
 	}
 }
+
+func TestParseAtom03WithLanguage(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+	<feed version="0.3" xmlns="http://purl.org/atom/ns#" xmlns:foo="http://example.org/ns" xml:lang="fr-CA" foo:lang="zz">
+		<title>dive into mark</title>
+		<link rel="alternate" type="text/html" href="http://diveintomark.org/"/>
+		<modified>2003-12-13T18:30:02Z</modified>
+		<entry xml:lang="fr-CA" foo:lang="zz">
+			<title>Atom 0.3 snapshot</title>
+			<link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/>
+			<id>tag:diveintomark.org,2003:3.2397</id>
+			<issued>2003-12-13T08:29:29-04:00</issued>
+			<modified>2003-12-13T18:30:02Z</modified>
+		</entry>
+	</feed>`
+
+	feed, err := Parse("http://diveintomark.org/atom.xml", bytes.NewReader([]byte(data)), "0.3")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if feed.Language != "fr-ca" {
+		t.Errorf("Incorrect language, got: %q", feed.Language)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Fatalf("Expected 1 entry, got: %d", len(feed.Entries))
+	}
+
+	if feed.Entries[0].Language != "fr-ca" {
+		t.Errorf("Incorrect entry language, got: %q", feed.Entries[0].Language)
+	}
+}

+ 10 - 5
internal/reader/atom/atom_10.go

@@ -22,10 +22,11 @@ import (
 type atom10Feed struct {
 	XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
 
-	// xml:lang declares the natural language of the feed and, by
-	// inheritance, of its entries. Persisted on the feed row and emitted
-	// as an HTML lang attribute by the web reader.
-	Language string `xml:"lang,attr"`
+	// Language is the natural language of the feed, declared by an
+	// xml:lang attribute on the atom:feed element. The tag is
+	// namespace-qualified so that lang attributes from other namespaces
+	// cannot override the real xml:lang value.
+	Language string `xml:"http://www.w3.org/XML/1998/namespace lang,attr"`
 
 	// The "atom:id" element conveys a permanent, universally unique
 	// identifier for an entry or feed.
@@ -101,7 +102,11 @@ type atom10Entry struct {
 	// atom:entry elements MUST contain exactly one atom:id element.
 	ID string `xml:"http://www.w3.org/2005/Atom id"`
 
-	Language string `xml:"lang,attr"`
+	// Language is the natural language of the entry, declared by an
+	// xml:lang attribute on the atom:entry element. The tag is
+	// namespace-qualified so that lang attributes from other namespaces
+	// cannot override the real xml:lang value.
+	Language string `xml:"http://www.w3.org/XML/1998/namespace lang,attr"`
 
 	// The "atom:title" element is a Text construct that conveys a human-
 	// readable title for an entry or feed.

+ 52 - 0
internal/reader/atom/atom_10_test.go

@@ -1961,3 +1961,55 @@ func TestParseEntryWithoutLanguage(t *testing.T) {
 		t.Errorf("Expected empty entry language, got: %q", feed.Entries[0].Language)
 	}
 }
+
+func TestParseFeedWithForeignLangAttribute(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+	<feed xmlns="http://www.w3.org/2005/Atom" xmlns:foo="http://example.org/ns" xml:lang="fr-CA" foo:lang="zz">
+		<title>Example Feed</title>
+		<link href="http://example.org/"/>
+		<updated>2003-12-13T18:30:02Z</updated>
+		<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
+		<entry xml:lang="fr-CA" foo:lang="zz">
+			<title>Bonjour</title>
+			<link href="http://example.org/2003/12/13/bonjour"/>
+			<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+			<updated>2003-12-13T18:30:02Z</updated>
+		</entry>
+	</feed>`
+
+	feed, err := Parse("http://example.org/feed.xml", bytes.NewReader([]byte(data)), "10")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if feed.Language != "fr-ca" {
+		t.Errorf("Incorrect language, got: %q", feed.Language)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Fatalf("Expected 1 entry, got: %d", len(feed.Entries))
+	}
+
+	if feed.Entries[0].Language != "fr-ca" {
+		t.Errorf("Incorrect entry language, got: %q", feed.Entries[0].Language)
+	}
+}
+
+func TestParseFeedWithUnqualifiedLangAttribute(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+	<feed xmlns="http://www.w3.org/2005/Atom" lang="de">
+		<title>Example Feed</title>
+		<link href="http://example.org/"/>
+		<updated>2003-12-13T18:30:02Z</updated>
+		<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
+	</feed>`
+
+	feed, err := Parse("http://example.org/feed.xml", bytes.NewReader([]byte(data)), "10")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if feed.Language != "" {
+		t.Errorf("Expected empty language for unqualified lang attribute, got: %q", feed.Language)
+	}
+}