parser_test.go 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
  2. // SPDX-License-Identifier: Apache-2.0
  3. package parser // import "miniflux.app/v2/internal/reader/parser"
  4. import (
  5. "strings"
  6. "testing"
  7. )
  8. func FuzzParse(f *testing.F) {
  9. f.Add("https://z.org", `<?xml version="1.0" encoding="utf-8"?>
  10. <feed xmlns="http://www.w3.org/2005/Atom">
  11. <title>Example Feed</title>
  12. <link href="http://z.org/"/>
  13. <link href="/k"/>
  14. <updated>2003-12-13T18:30:02Z</updated>
  15. <author><name>John Doe</name></author>
  16. <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
  17. <entry>
  18. <title>a</title>
  19. <link href="http://example.org/b"/>
  20. <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
  21. <updated>2003-12-13T18:30:02Z</updated>
  22. <summary>c</summary>
  23. </entry>
  24. </feed>`)
  25. f.Add("https://z.org", `<?xml version="1.0"?>
  26. <rss version="2.0">
  27. <channel>
  28. <title>a</title>
  29. <link>http://z.org</link>
  30. <item>
  31. <title>a</title>
  32. <link>http://z.org</link>
  33. <description>d</description>
  34. <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
  35. <guid>l</guid>
  36. </item>
  37. </channel>
  38. </rss>`)
  39. f.Add("https://z.org", `<?xml version="1.0" encoding="utf-8"?>
  40. <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
  41. <channel>
  42. <title>a</title>
  43. <link>http://z.org/</link>
  44. </channel>
  45. <item>
  46. <title>a</title>
  47. <link>/</link>
  48. <description>c</description>
  49. </item>
  50. </rdf:RDF>`)
  51. f.Add("http://z.org", `{
  52. "version": "http://jsonfeed.org/version/1",
  53. "title": "a",
  54. "home_page_url": "http://z.org/",
  55. "feed_url": "http://z.org/a.json",
  56. "items": [
  57. {"id": "2","content_text": "a","url": "https://z.org/2"},
  58. {"id": "1","content_html": "<a","url":"http://z.org/1"}]}`)
  59. f.Fuzz(func(t *testing.T, url string, data string) {
  60. ParseFeed(url, strings.NewReader(data))
  61. })
  62. }
  63. func TestParseAtom(t *testing.T) {
  64. data := `<?xml version="1.0" encoding="utf-8"?>
  65. <feed xmlns="http://www.w3.org/2005/Atom">
  66. <title>Example Feed</title>
  67. <link href="http://example.org/"/>
  68. <updated>2003-12-13T18:30:02Z</updated>
  69. <author>
  70. <name>John Doe</name>
  71. </author>
  72. <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
  73. <entry>
  74. <title>Atom-Powered Robots Run Amok</title>
  75. <link href="http://example.org/2003/12/13/atom03"/>
  76. <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
  77. <updated>2003-12-13T18:30:02Z</updated>
  78. <summary>Some text.</summary>
  79. </entry>
  80. </feed>`
  81. feed, err := ParseFeed("https://example.org/", strings.NewReader(data))
  82. if err != nil {
  83. t.Error(err)
  84. }
  85. if feed.Title != "Example Feed" {
  86. t.Errorf("Incorrect title, got: %s", feed.Title)
  87. }
  88. }
  89. func TestParseAtomFeedWithRelativeURL(t *testing.T) {
  90. data := `<?xml version="1.0" encoding="utf-8"?>
  91. <feed xmlns="http://www.w3.org/2005/Atom">
  92. <title>Example Feed</title>
  93. <link href="/blog/atom.xml" rel="self" type="application/atom+xml"/>
  94. <link href="/blog"/>
  95. <entry>
  96. <title>Test</title>
  97. <link href="/blog/article.html"/>
  98. <link href="/blog/article.html" rel="alternate" type="text/html"/>
  99. <id>/blog/article.html</id>
  100. <updated>2003-12-13T18:30:02Z</updated>
  101. <summary>Some text.</summary>
  102. </entry>
  103. </feed>`
  104. feed, err := ParseFeed("https://example.org/blog/atom.xml", strings.NewReader(data))
  105. if err != nil {
  106. t.Fatal(err)
  107. }
  108. if feed.FeedURL != "https://example.org/blog/atom.xml" {
  109. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  110. }
  111. if feed.SiteURL != "https://example.org/blog" {
  112. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  113. }
  114. if feed.Entries[0].URL != "https://example.org/blog/article.html" {
  115. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  116. }
  117. }
  118. func TestParseRSS(t *testing.T) {
  119. data := `<?xml version="1.0"?>
  120. <rss version="2.0">
  121. <channel>
  122. <title>Liftoff News</title>
  123. <link>http://liftoff.msfc.nasa.gov/</link>
  124. <item>
  125. <title>Star City</title>
  126. <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
  127. <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
  128. <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
  129. <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
  130. </item>
  131. </channel>
  132. </rss>`
  133. feed, err := ParseFeed("http://liftoff.msfc.nasa.gov/", strings.NewReader(data))
  134. if err != nil {
  135. t.Error(err)
  136. }
  137. if feed.Title != "Liftoff News" {
  138. t.Errorf("Incorrect title, got: %s", feed.Title)
  139. }
  140. }
  141. func TestParseRSSFeedWithRelativeURL(t *testing.T) {
  142. data := `<?xml version="1.0"?>
  143. <rss version="2.0">
  144. <channel>
  145. <title>Example Feed</title>
  146. <link>/blog</link>
  147. <item>
  148. <title>Example Entry</title>
  149. <link>/blog/article.html</link>
  150. <description>Something</description>
  151. <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
  152. <guid>1234</guid>
  153. </item>
  154. </channel>
  155. </rss>`
  156. feed, err := ParseFeed("http://example.org/rss.xml", strings.NewReader(data))
  157. if err != nil {
  158. t.Error(err)
  159. }
  160. if feed.Title != "Example Feed" {
  161. t.Errorf("Incorrect title, got: %s", feed.Title)
  162. }
  163. if feed.FeedURL != "http://example.org/rss.xml" {
  164. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  165. }
  166. if feed.SiteURL != "http://example.org/blog" {
  167. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  168. }
  169. if feed.Entries[0].URL != "http://example.org/blog/article.html" {
  170. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  171. }
  172. }
  173. func TestParseRDF(t *testing.T) {
  174. data := `<?xml version="1.0" encoding="utf-8"?>
  175. <rdf:RDF
  176. xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  177. xmlns="http://purl.org/rss/1.0/"
  178. >
  179. <channel>
  180. <title>RDF Example</title>
  181. <link>http://example.org/</link>
  182. </channel>
  183. <item>
  184. <title>Title</title>
  185. <link>http://example.org/item</link>
  186. <description>Test</description>
  187. </item>
  188. </rdf:RDF>`
  189. feed, err := ParseFeed("http://example.org/", strings.NewReader(data))
  190. if err != nil {
  191. t.Error(err)
  192. }
  193. if feed.Title != "RDF Example" {
  194. t.Errorf("Incorrect title, got: %s", feed.Title)
  195. }
  196. }
  197. func TestParseRDFWithRelativeURL(t *testing.T) {
  198. data := `<?xml version="1.0" encoding="utf-8"?>
  199. <rdf:RDF
  200. xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  201. xmlns="http://purl.org/rss/1.0/"
  202. >
  203. <channel>
  204. <title>RDF Example</title>
  205. <link>/blog</link>
  206. </channel>
  207. <item>
  208. <title>Title</title>
  209. <link>/blog/article.html</link>
  210. <description>Test</description>
  211. </item>
  212. </rdf:RDF>`
  213. feed, err := ParseFeed("http://example.org/rdf.xml", strings.NewReader(data))
  214. if err != nil {
  215. t.Error(err)
  216. }
  217. if feed.FeedURL != "http://example.org/rdf.xml" {
  218. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  219. }
  220. if feed.SiteURL != "http://example.org/blog" {
  221. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  222. }
  223. if feed.Entries[0].URL != "http://example.org/blog/article.html" {
  224. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  225. }
  226. }
  227. func TestParseJson(t *testing.T) {
  228. data := `{
  229. "version": "https://jsonfeed.org/version/1",
  230. "title": "My Example Feed",
  231. "home_page_url": "https://example.org/",
  232. "feed_url": "https://example.org/feed.json",
  233. "items": [
  234. {
  235. "id": "2",
  236. "content_text": "This is a second item.",
  237. "url": "https://example.org/second-item"
  238. },
  239. {
  240. "id": "1",
  241. "content_html": "<p>Hello, world!</p>",
  242. "url": "https://example.org/initial-post"
  243. }
  244. ]
  245. }`
  246. feed, err := ParseFeed("https://example.org/feed.json", strings.NewReader(data))
  247. if err != nil {
  248. t.Error(err)
  249. }
  250. if feed.Title != "My Example Feed" {
  251. t.Errorf("Incorrect title, got: %s", feed.Title)
  252. }
  253. }
  254. func TestParseJsonFeedWithRelativeURL(t *testing.T) {
  255. data := `{
  256. "version": "https://jsonfeed.org/version/1",
  257. "title": "My Example Feed",
  258. "home_page_url": "/blog",
  259. "feed_url": "/blog/feed.json",
  260. "items": [
  261. {
  262. "id": "2",
  263. "content_text": "This is a second item.",
  264. "url": "/blog/article.html"
  265. }
  266. ]
  267. }`
  268. feed, err := ParseFeed("https://example.org/blog/feed.json", strings.NewReader(data))
  269. if err != nil {
  270. t.Error(err)
  271. }
  272. if feed.Title != "My Example Feed" {
  273. t.Errorf("Incorrect title, got: %s", feed.Title)
  274. }
  275. if feed.FeedURL != "https://example.org/blog/feed.json" {
  276. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  277. }
  278. if feed.SiteURL != "https://example.org/blog" {
  279. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  280. }
  281. if feed.Entries[0].URL != "https://example.org/blog/article.html" {
  282. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  283. }
  284. }
  285. func TestParseUnknownFeed(t *testing.T) {
  286. data := `
  287. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  288. <html xmlns="http://www.w3.org/1999/xhtml">
  289. <head>
  290. <title>Title of document</title>
  291. </head>
  292. <body>
  293. some content
  294. </body>
  295. </html>
  296. `
  297. _, err := ParseFeed("https://example.org/", strings.NewReader(data))
  298. if err == nil {
  299. t.Error("ParseFeed must returns an error")
  300. }
  301. }
  302. func TestParseEmptyFeed(t *testing.T) {
  303. _, err := ParseFeed("", strings.NewReader(""))
  304. if err == nil {
  305. t.Error("ParseFeed must returns an error")
  306. }
  307. }