parser_test.go 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package parser // import "miniflux.app/v2/internal/reader/parser"
  4. import (
  5. "strings"
  6. "testing"
  7. )
  8. func TestParseAtom(t *testing.T) {
  9. data := `<?xml version="1.0" encoding="utf-8"?>
  10. <feed xmlns="http://www.w3.org/2005/Atom">
  11. <title>Example Feed</title>
  12. <link href="http://example.org/"/>
  13. <updated>2003-12-13T18:30:02Z</updated>
  14. <author>
  15. <name>John Doe</name>
  16. </author>
  17. <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
  18. <entry>
  19. <title>Atom-Powered Robots Run Amok</title>
  20. <link href="http://example.org/2003/12/13/atom03"/>
  21. <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
  22. <updated>2003-12-13T18:30:02Z</updated>
  23. <summary>Some text.</summary>
  24. </entry>
  25. </feed>`
  26. feed, err := ParseFeed("https://example.org/", strings.NewReader(data))
  27. if err != nil {
  28. t.Error(err)
  29. }
  30. if feed.Title != "Example Feed" {
  31. t.Errorf("Incorrect title, got: %s", feed.Title)
  32. }
  33. }
  34. func TestParseAtomFeedWithRelativeURL(t *testing.T) {
  35. data := `<?xml version="1.0" encoding="utf-8"?>
  36. <feed xmlns="http://www.w3.org/2005/Atom">
  37. <title>Example Feed</title>
  38. <link href="/blog/atom.xml" rel="self" type="application/atom+xml"/>
  39. <link href="/blog"/>
  40. <entry>
  41. <title>Test</title>
  42. <link href="/blog/article.html"/>
  43. <link href="/blog/article.html" rel="alternate" type="text/html"/>
  44. <id>/blog/article.html</id>
  45. <updated>2003-12-13T18:30:02Z</updated>
  46. <summary>Some text.</summary>
  47. </entry>
  48. </feed>`
  49. feed, err := ParseFeed("https://example.org/blog/atom.xml", strings.NewReader(data))
  50. if err != nil {
  51. t.Fatal(err)
  52. }
  53. if feed.FeedURL != "https://example.org/blog/atom.xml" {
  54. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  55. }
  56. if feed.SiteURL != "https://example.org/blog" {
  57. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  58. }
  59. if feed.Entries[0].URL != "https://example.org/blog/article.html" {
  60. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  61. }
  62. }
  63. func TestParseRSS(t *testing.T) {
  64. data := `<?xml version="1.0"?>
  65. <rss version="2.0">
  66. <channel>
  67. <title>Liftoff News</title>
  68. <link>http://liftoff.msfc.nasa.gov/</link>
  69. <item>
  70. <title>Star City</title>
  71. <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
  72. <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
  73. <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
  74. <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
  75. </item>
  76. </channel>
  77. </rss>`
  78. feed, err := ParseFeed("http://liftoff.msfc.nasa.gov/", strings.NewReader(data))
  79. if err != nil {
  80. t.Error(err)
  81. }
  82. if feed.Title != "Liftoff News" {
  83. t.Errorf("Incorrect title, got: %s", feed.Title)
  84. }
  85. }
  86. func TestParseRSSFeedWithRelativeURL(t *testing.T) {
  87. data := `<?xml version="1.0"?>
  88. <rss version="2.0">
  89. <channel>
  90. <title>Example Feed</title>
  91. <link>/blog</link>
  92. <item>
  93. <title>Example Entry</title>
  94. <link>/blog/article.html</link>
  95. <description>Something</description>
  96. <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
  97. <guid>1234</guid>
  98. </item>
  99. </channel>
  100. </rss>`
  101. feed, err := ParseFeed("http://example.org/rss.xml", strings.NewReader(data))
  102. if err != nil {
  103. t.Error(err)
  104. }
  105. if feed.Title != "Example Feed" {
  106. t.Errorf("Incorrect title, got: %s", feed.Title)
  107. }
  108. if feed.FeedURL != "http://example.org/rss.xml" {
  109. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  110. }
  111. if feed.SiteURL != "http://example.org/blog" {
  112. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  113. }
  114. if feed.Entries[0].URL != "http://example.org/blog/article.html" {
  115. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  116. }
  117. }
  118. func TestParseRDF(t *testing.T) {
  119. data := `<?xml version="1.0" encoding="utf-8"?>
  120. <rdf:RDF
  121. xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  122. xmlns="http://purl.org/rss/1.0/"
  123. >
  124. <channel>
  125. <title>RDF Example</title>
  126. <link>http://example.org/</link>
  127. </channel>
  128. <item>
  129. <title>Title</title>
  130. <link>http://example.org/item</link>
  131. <description>Test</description>
  132. </item>
  133. </rdf:RDF>`
  134. feed, err := ParseFeed("http://example.org/", strings.NewReader(data))
  135. if err != nil {
  136. t.Error(err)
  137. }
  138. if feed.Title != "RDF Example" {
  139. t.Errorf("Incorrect title, got: %s", feed.Title)
  140. }
  141. }
  142. func TestParseRDFWithRelativeURL(t *testing.T) {
  143. data := `<?xml version="1.0" encoding="utf-8"?>
  144. <rdf:RDF
  145. xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  146. xmlns="http://purl.org/rss/1.0/"
  147. >
  148. <channel>
  149. <title>RDF Example</title>
  150. <link>/blog</link>
  151. </channel>
  152. <item>
  153. <title>Title</title>
  154. <link>/blog/article.html</link>
  155. <description>Test</description>
  156. </item>
  157. </rdf:RDF>`
  158. feed, err := ParseFeed("http://example.org/rdf.xml", strings.NewReader(data))
  159. if err != nil {
  160. t.Error(err)
  161. }
  162. if feed.FeedURL != "http://example.org/rdf.xml" {
  163. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  164. }
  165. if feed.SiteURL != "http://example.org/blog" {
  166. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  167. }
  168. if feed.Entries[0].URL != "http://example.org/blog/article.html" {
  169. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  170. }
  171. }
  172. func TestParseJson(t *testing.T) {
  173. data := `{
  174. "version": "https://jsonfeed.org/version/1",
  175. "title": "My Example Feed",
  176. "home_page_url": "https://example.org/",
  177. "feed_url": "https://example.org/feed.json",
  178. "items": [
  179. {
  180. "id": "2",
  181. "content_text": "This is a second item.",
  182. "url": "https://example.org/second-item"
  183. },
  184. {
  185. "id": "1",
  186. "content_html": "<p>Hello, world!</p>",
  187. "url": "https://example.org/initial-post"
  188. }
  189. ]
  190. }`
  191. feed, err := ParseFeed("https://example.org/feed.json", strings.NewReader(data))
  192. if err != nil {
  193. t.Error(err)
  194. }
  195. if feed.Title != "My Example Feed" {
  196. t.Errorf("Incorrect title, got: %s", feed.Title)
  197. }
  198. }
  199. func TestParseJsonFeedWithRelativeURL(t *testing.T) {
  200. data := `{
  201. "version": "https://jsonfeed.org/version/1",
  202. "title": "My Example Feed",
  203. "home_page_url": "/blog",
  204. "feed_url": "/blog/feed.json",
  205. "items": [
  206. {
  207. "id": "2",
  208. "content_text": "This is a second item.",
  209. "url": "/blog/article.html"
  210. }
  211. ]
  212. }`
  213. feed, err := ParseFeed("https://example.org/blog/feed.json", strings.NewReader(data))
  214. if err != nil {
  215. t.Error(err)
  216. }
  217. if feed.Title != "My Example Feed" {
  218. t.Errorf("Incorrect title, got: %s", feed.Title)
  219. }
  220. if feed.FeedURL != "https://example.org/blog/feed.json" {
  221. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  222. }
  223. if feed.SiteURL != "https://example.org/blog" {
  224. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  225. }
  226. if feed.Entries[0].URL != "https://example.org/blog/article.html" {
  227. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  228. }
  229. }
  230. func TestParseUnknownFeed(t *testing.T) {
  231. data := `
  232. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  233. <html xmlns="http://www.w3.org/1999/xhtml">
  234. <head>
  235. <title>Title of document</title>
  236. </head>
  237. <body>
  238. some content
  239. </body>
  240. </html>
  241. `
  242. _, err := ParseFeed("https://example.org/", strings.NewReader(data))
  243. if err == nil {
  244. t.Error("ParseFeed must returns an error")
  245. }
  246. }
  247. func TestParseEmptyFeed(t *testing.T) {
  248. _, err := ParseFeed("", strings.NewReader(""))
  249. if err == nil {
  250. t.Error("ParseFeed must returns an error")
  251. }
  252. }