parser_test.go 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package parser // import "miniflux.app/reader/parser"
  5. import (
  6. "bytes"
  7. "io/ioutil"
  8. "testing"
  9. "miniflux.app/http/client"
  10. )
  11. func TestParseAtom(t *testing.T) {
  12. data := `<?xml version="1.0" encoding="utf-8"?>
  13. <feed xmlns="http://www.w3.org/2005/Atom">
  14. <title>Example Feed</title>
  15. <link href="http://example.org/"/>
  16. <updated>2003-12-13T18:30:02Z</updated>
  17. <author>
  18. <name>John Doe</name>
  19. </author>
  20. <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
  21. <entry>
  22. <title>Atom-Powered Robots Run Amok</title>
  23. <link href="http://example.org/2003/12/13/atom03"/>
  24. <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
  25. <updated>2003-12-13T18:30:02Z</updated>
  26. <summary>Some text.</summary>
  27. </entry>
  28. </feed>`
  29. feed, err := ParseFeed(data)
  30. if err != nil {
  31. t.Error(err)
  32. }
  33. if feed.Title != "Example Feed" {
  34. t.Errorf("Incorrect title, got: %s", feed.Title)
  35. }
  36. }
  37. func TestParseRSS(t *testing.T) {
  38. data := `<?xml version="1.0"?>
  39. <rss version="2.0">
  40. <channel>
  41. <title>Liftoff News</title>
  42. <link>http://liftoff.msfc.nasa.gov/</link>
  43. <item>
  44. <title>Star City</title>
  45. <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
  46. <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
  47. <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
  48. <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
  49. </item>
  50. </channel>
  51. </rss>`
  52. feed, err := ParseFeed(data)
  53. if err != nil {
  54. t.Error(err)
  55. }
  56. if feed.Title != "Liftoff News" {
  57. t.Errorf("Incorrect title, got: %s", feed.Title)
  58. }
  59. }
  60. func TestParseRDF(t *testing.T) {
  61. data := `<?xml version="1.0" encoding="utf-8"?>
  62. <rdf:RDF
  63. xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  64. xmlns="http://purl.org/rss/1.0/"
  65. >
  66. <channel>
  67. <title>RDF Example</title>
  68. <link>http://example.org/</link>
  69. </channel>
  70. <item>
  71. <title>Title</title>
  72. <link>http://example.org/item</link>
  73. <description>Test</description>
  74. </item>
  75. </rdf:RDF>`
  76. feed, err := ParseFeed(data)
  77. if err != nil {
  78. t.Error(err)
  79. }
  80. if feed.Title != "RDF Example" {
  81. t.Errorf("Incorrect title, got: %s", feed.Title)
  82. }
  83. }
  84. func TestParseJson(t *testing.T) {
  85. data := `{
  86. "version": "https://jsonfeed.org/version/1",
  87. "title": "My Example Feed",
  88. "home_page_url": "https://example.org/",
  89. "feed_url": "https://example.org/feed.json",
  90. "items": [
  91. {
  92. "id": "2",
  93. "content_text": "This is a second item.",
  94. "url": "https://example.org/second-item"
  95. },
  96. {
  97. "id": "1",
  98. "content_html": "<p>Hello, world!</p>",
  99. "url": "https://example.org/initial-post"
  100. }
  101. ]
  102. }`
  103. feed, err := ParseFeed(data)
  104. if err != nil {
  105. t.Error(err)
  106. }
  107. if feed.Title != "My Example Feed" {
  108. t.Errorf("Incorrect title, got: %s", feed.Title)
  109. }
  110. }
  111. func TestParseUnknownFeed(t *testing.T) {
  112. data := `
  113. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  114. <html xmlns="http://www.w3.org/1999/xhtml">
  115. <head>
  116. <title>Title of document</title>
  117. </head>
  118. <body>
  119. some content
  120. </body>
  121. </html>
  122. `
  123. _, err := ParseFeed(data)
  124. if err == nil {
  125. t.Error("ParseFeed must returns an error")
  126. }
  127. }
  128. func TestParseEmptyFeed(t *testing.T) {
  129. _, err := ParseFeed("")
  130. if err == nil {
  131. t.Error("ParseFeed must returns an error")
  132. }
  133. }
  134. func TestDifferentEncodingWithResponse(t *testing.T) {
  135. var unicodeTestCases = []struct {
  136. filename, contentType string
  137. index int
  138. title string
  139. }{
  140. // Arabic language encoded in UTF-8.
  141. {"urdu_UTF8.xml", "text/xml; charset=utf-8", 0, "امریکی عسکری امداد کی بندش کی وجوہات: انڈیا سے جنگ، جوہری پروگرام اور اب دہشت گردوں کی پشت پناہی"},
  142. // Windows-1251 encoding and not charset in HTTP header.
  143. {"encoding_WINDOWS-1251.xml", "text/xml", 0, "Цитата #17703"},
  144. // No encoding in XML, but defined in HTTP Content-Type header.
  145. {"no_encoding_ISO-8859-1.xml", "application/xml; charset=ISO-8859-1", 2, "La criminalité liée surtout à... l'ennui ?"},
  146. // ISO-8859-1 encoding defined in XML and HTTP header.
  147. {"encoding_ISO-8859-1.xml", "application/rss+xml; charset=ISO-8859-1", 5, "Projekt Jedi: Microsoft will weiter mit US-Militär zusammenarbeiten"},
  148. // UTF-8 encoding defined in RDF document and HTTP header.
  149. {"rdf_UTF8.xml", "application/rss+xml; charset=utf-8", 1, "Mega-Deal: IBM übernimmt Red Hat"},
  150. // UTF-8 encoding defined only in RDF document.
  151. {"rdf_UTF8.xml", "application/rss+xml", 1, "Mega-Deal: IBM übernimmt Red Hat"},
  152. }
  153. for _, tc := range unicodeTestCases {
  154. content, err := ioutil.ReadFile("testdata/" + tc.filename)
  155. if err != nil {
  156. t.Fatalf(`Unable to read file %q: %v`, tc.filename, err)
  157. }
  158. r := &client.Response{Body: bytes.NewReader(content), ContentType: tc.contentType}
  159. r.EnsureUnicodeBody()
  160. feed, parseErr := ParseFeed(r.String())
  161. if parseErr != nil {
  162. t.Fatalf(`Parsing error for %q - %q: %v`, tc.filename, tc.contentType, parseErr)
  163. }
  164. if feed.Entries[tc.index].Title != tc.title {
  165. t.Errorf(`Unexpected title, got %q instead of %q`, feed.Entries[tc.index].Title, tc.title)
  166. }
  167. }
  168. }