parser_test.go 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package parser // import "miniflux.app/v2/internal/reader/parser"
  4. import (
  5. "testing"
  6. )
  7. func TestParseAtom(t *testing.T) {
  8. data := `<?xml version="1.0" encoding="utf-8"?>
  9. <feed xmlns="http://www.w3.org/2005/Atom">
  10. <title>Example Feed</title>
  11. <link href="http://example.org/"/>
  12. <updated>2003-12-13T18:30:02Z</updated>
  13. <author>
  14. <name>John Doe</name>
  15. </author>
  16. <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
  17. <entry>
  18. <title>Atom-Powered Robots Run Amok</title>
  19. <link href="http://example.org/2003/12/13/atom03"/>
  20. <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
  21. <updated>2003-12-13T18:30:02Z</updated>
  22. <summary>Some text.</summary>
  23. </entry>
  24. </feed>`
  25. feed, err := ParseFeed("https://example.org/", data)
  26. if err != nil {
  27. t.Error(err)
  28. }
  29. if feed.Title != "Example Feed" {
  30. t.Errorf("Incorrect title, got: %s", feed.Title)
  31. }
  32. }
  33. func TestParseAtomFeedWithRelativeURL(t *testing.T) {
  34. data := `<?xml version="1.0" encoding="utf-8"?>
  35. <feed xmlns="http://www.w3.org/2005/Atom">
  36. <title>Example Feed</title>
  37. <link href="/blog/atom.xml" rel="self" type="application/atom+xml"/>
  38. <link href="/blog"/>
  39. <entry>
  40. <title>Test</title>
  41. <link href="/blog/article.html"/>
  42. <link href="/blog/article.html" rel="alternate" type="text/html"/>
  43. <id>/blog/article.html</id>
  44. <updated>2003-12-13T18:30:02Z</updated>
  45. <summary>Some text.</summary>
  46. </entry>
  47. </feed>`
  48. feed, err := ParseFeed("https://example.org/blog/atom.xml", data)
  49. if err != nil {
  50. t.Fatal(err)
  51. }
  52. if feed.FeedURL != "https://example.org/blog/atom.xml" {
  53. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  54. }
  55. if feed.SiteURL != "https://example.org/blog" {
  56. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  57. }
  58. if feed.Entries[0].URL != "https://example.org/blog/article.html" {
  59. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  60. }
  61. }
  62. func TestParseRSS(t *testing.T) {
  63. data := `<?xml version="1.0"?>
  64. <rss version="2.0">
  65. <channel>
  66. <title>Liftoff News</title>
  67. <link>http://liftoff.msfc.nasa.gov/</link>
  68. <item>
  69. <title>Star City</title>
  70. <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
  71. <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
  72. <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
  73. <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
  74. </item>
  75. </channel>
  76. </rss>`
  77. feed, err := ParseFeed("http://liftoff.msfc.nasa.gov/", data)
  78. if err != nil {
  79. t.Error(err)
  80. }
  81. if feed.Title != "Liftoff News" {
  82. t.Errorf("Incorrect title, got: %s", feed.Title)
  83. }
  84. }
  85. func TestParseRSSFeedWithRelativeURL(t *testing.T) {
  86. data := `<?xml version="1.0"?>
  87. <rss version="2.0">
  88. <channel>
  89. <title>Example Feed</title>
  90. <link>/blog</link>
  91. <item>
  92. <title>Example Entry</title>
  93. <link>/blog/article.html</link>
  94. <description>Something</description>
  95. <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
  96. <guid>1234</guid>
  97. </item>
  98. </channel>
  99. </rss>`
  100. feed, err := ParseFeed("http://example.org/rss.xml", data)
  101. if err != nil {
  102. t.Error(err)
  103. }
  104. if feed.Title != "Example Feed" {
  105. t.Errorf("Incorrect title, got: %s", feed.Title)
  106. }
  107. if feed.FeedURL != "http://example.org/rss.xml" {
  108. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  109. }
  110. if feed.SiteURL != "http://example.org/blog" {
  111. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  112. }
  113. if feed.Entries[0].URL != "http://example.org/blog/article.html" {
  114. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  115. }
  116. }
  117. func TestParseRDF(t *testing.T) {
  118. data := `<?xml version="1.0" encoding="utf-8"?>
  119. <rdf:RDF
  120. xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  121. xmlns="http://purl.org/rss/1.0/"
  122. >
  123. <channel>
  124. <title>RDF Example</title>
  125. <link>http://example.org/</link>
  126. </channel>
  127. <item>
  128. <title>Title</title>
  129. <link>http://example.org/item</link>
  130. <description>Test</description>
  131. </item>
  132. </rdf:RDF>`
  133. feed, err := ParseFeed("http://example.org/", data)
  134. if err != nil {
  135. t.Error(err)
  136. }
  137. if feed.Title != "RDF Example" {
  138. t.Errorf("Incorrect title, got: %s", feed.Title)
  139. }
  140. }
  141. func TestParseRDFWithRelativeURL(t *testing.T) {
  142. data := `<?xml version="1.0" encoding="utf-8"?>
  143. <rdf:RDF
  144. xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  145. xmlns="http://purl.org/rss/1.0/"
  146. >
  147. <channel>
  148. <title>RDF Example</title>
  149. <link>/blog</link>
  150. </channel>
  151. <item>
  152. <title>Title</title>
  153. <link>/blog/article.html</link>
  154. <description>Test</description>
  155. </item>
  156. </rdf:RDF>`
  157. feed, err := ParseFeed("http://example.org/rdf.xml", data)
  158. if err != nil {
  159. t.Error(err)
  160. }
  161. if feed.FeedURL != "http://example.org/rdf.xml" {
  162. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  163. }
  164. if feed.SiteURL != "http://example.org/blog" {
  165. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  166. }
  167. if feed.Entries[0].URL != "http://example.org/blog/article.html" {
  168. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  169. }
  170. }
  171. func TestParseJson(t *testing.T) {
  172. data := `{
  173. "version": "https://jsonfeed.org/version/1",
  174. "title": "My Example Feed",
  175. "home_page_url": "https://example.org/",
  176. "feed_url": "https://example.org/feed.json",
  177. "items": [
  178. {
  179. "id": "2",
  180. "content_text": "This is a second item.",
  181. "url": "https://example.org/second-item"
  182. },
  183. {
  184. "id": "1",
  185. "content_html": "<p>Hello, world!</p>",
  186. "url": "https://example.org/initial-post"
  187. }
  188. ]
  189. }`
  190. feed, err := ParseFeed("https://example.org/feed.json", data)
  191. if err != nil {
  192. t.Error(err)
  193. }
  194. if feed.Title != "My Example Feed" {
  195. t.Errorf("Incorrect title, got: %s", feed.Title)
  196. }
  197. }
  198. func TestParseJsonFeedWithRelativeURL(t *testing.T) {
  199. data := `{
  200. "version": "https://jsonfeed.org/version/1",
  201. "title": "My Example Feed",
  202. "home_page_url": "/blog",
  203. "feed_url": "/blog/feed.json",
  204. "items": [
  205. {
  206. "id": "2",
  207. "content_text": "This is a second item.",
  208. "url": "/blog/article.html"
  209. }
  210. ]
  211. }`
  212. feed, err := ParseFeed("https://example.org/blog/feed.json", data)
  213. if err != nil {
  214. t.Error(err)
  215. }
  216. if feed.Title != "My Example Feed" {
  217. t.Errorf("Incorrect title, got: %s", feed.Title)
  218. }
  219. if feed.FeedURL != "https://example.org/blog/feed.json" {
  220. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  221. }
  222. if feed.SiteURL != "https://example.org/blog" {
  223. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  224. }
  225. if feed.Entries[0].URL != "https://example.org/blog/article.html" {
  226. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  227. }
  228. }
  229. func TestParseUnknownFeed(t *testing.T) {
  230. data := `
  231. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  232. <html xmlns="http://www.w3.org/1999/xhtml">
  233. <head>
  234. <title>Title of document</title>
  235. </head>
  236. <body>
  237. some content
  238. </body>
  239. </html>
  240. `
  241. _, err := ParseFeed("https://example.org/", data)
  242. if err == nil {
  243. t.Error("ParseFeed must returns an error")
  244. }
  245. }
  246. func TestParseEmptyFeed(t *testing.T) {
  247. _, err := ParseFeed("", "")
  248. if err == nil {
  249. t.Error("ParseFeed must returns an error")
  250. }
  251. }