parser_test.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571
  1. // Copyright 2017 Frédéric Guillot. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package rss
  5. import (
  6. "bytes"
  7. "testing"
  8. "time"
  9. "github.com/miniflux/miniflux/errors"
  10. )
  11. func TestParseRss2Sample(t *testing.T) {
  12. data := `
  13. <?xml version="1.0"?>
  14. <rss version="2.0">
  15. <channel>
  16. <title>Liftoff News</title>
  17. <link>http://liftoff.msfc.nasa.gov/</link>
  18. <description>Liftoff to Space Exploration.</description>
  19. <language>en-us</language>
  20. <pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
  21. <lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
  22. <docs>http://blogs.law.harvard.edu/tech/rss</docs>
  23. <generator>Weblog Editor 2.0</generator>
  24. <managingEditor>editor@example.com</managingEditor>
  25. <webMaster>webmaster@example.com</webMaster>
  26. <item>
  27. <title>Star City</title>
  28. <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
  29. <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
  30. <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
  31. <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
  32. </item>
  33. <item>
  34. <description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description>
  35. <pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
  36. <guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
  37. </item>
  38. <item>
  39. <title>The Engine That Does More</title>
  40. <link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
  41. <description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description>
  42. <pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
  43. <guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
  44. </item>
  45. <item>
  46. <title>Astronauts' Dirty Laundry</title>
  47. <link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
  48. <description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description>
  49. <pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
  50. <guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>
  51. </item>
  52. </channel>
  53. </rss>`
  54. feed, err := Parse(bytes.NewBufferString(data))
  55. if err != nil {
  56. t.Error(err)
  57. }
  58. if feed.Title != "Liftoff News" {
  59. t.Errorf("Incorrect title, got: %s", feed.Title)
  60. }
  61. if feed.FeedURL != "" {
  62. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  63. }
  64. if feed.SiteURL != "http://liftoff.msfc.nasa.gov/" {
  65. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  66. }
  67. if len(feed.Entries) != 4 {
  68. t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
  69. }
  70. expectedDate := time.Date(2003, time.June, 3, 9, 39, 21, 0, time.UTC)
  71. if !feed.Entries[0].Date.Equal(expectedDate) {
  72. t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
  73. }
  74. if feed.Entries[0].Hash != "5b2b4ac2fe1786ddf0fd2da2f1b07f64e691264f41f2db3ea360f31bb6d9152b" {
  75. t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
  76. }
  77. if feed.Entries[0].URL != "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp" {
  78. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  79. }
  80. if feed.Entries[0].Title != "Star City" {
  81. t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
  82. }
  83. if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.` {
  84. t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
  85. }
  86. }
  87. func TestParseFeedWithoutTitle(t *testing.T) {
  88. data := `<?xml version="1.0" encoding="utf-8"?>
  89. <rss version="2.0">
  90. <channel>
  91. <link>https://example.org/</link>
  92. </channel>
  93. </rss>`
  94. feed, err := Parse(bytes.NewBufferString(data))
  95. if err != nil {
  96. t.Error(err)
  97. }
  98. if feed.Title != "https://example.org/" {
  99. t.Errorf("Incorrect feed title, got: %s", feed.Title)
  100. }
  101. }
  102. func TestParseEntryWithoutTitle(t *testing.T) {
  103. data := `<?xml version="1.0" encoding="utf-8"?>
  104. <rss version="2.0">
  105. <channel>
  106. <link>https://example.org/</link>
  107. <item>
  108. <link>https://example.org/item</link>
  109. </item>
  110. </channel>
  111. </rss>`
  112. feed, err := Parse(bytes.NewBufferString(data))
  113. if err != nil {
  114. t.Error(err)
  115. }
  116. if feed.Entries[0].Title != "https://example.org/item" {
  117. t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
  118. }
  119. }
  120. func TestParseEntryWithoutLink(t *testing.T) {
  121. data := `<?xml version="1.0" encoding="utf-8"?>
  122. <rss version="2.0">
  123. <channel>
  124. <link>https://example.org/</link>
  125. <item>
  126. <guid isPermaLink="false">1234</guid>
  127. </item>
  128. </channel>
  129. </rss>`
  130. feed, err := Parse(bytes.NewBufferString(data))
  131. if err != nil {
  132. t.Error(err)
  133. }
  134. if feed.Entries[0].URL != "https://example.org/" {
  135. t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL)
  136. }
  137. if feed.Entries[0].Hash != "03ac674216f3e15c761ee1a5e255f067953623c8b388b4459e13f978d7c846f4" {
  138. t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
  139. }
  140. }
  141. func TestParseEntryWithAtomLink(t *testing.T) {
  142. data := `<?xml version="1.0" encoding="utf-8"?>
  143. <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  144. <channel>
  145. <link>https://example.org/</link>
  146. <item>
  147. <title>Test</title>
  148. <atom:link href="https://example.org/item" />
  149. </item>
  150. </channel>
  151. </rss>`
  152. feed, err := Parse(bytes.NewBufferString(data))
  153. if err != nil {
  154. t.Error(err)
  155. }
  156. if feed.Entries[0].URL != "https://example.org/item" {
  157. t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL)
  158. }
  159. }
  160. func TestParseEntryWithMultipleAtomLinks(t *testing.T) {
  161. data := `<?xml version="1.0" encoding="utf-8"?>
  162. <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  163. <channel>
  164. <link>https://example.org/</link>
  165. <item>
  166. <title>Test</title>
  167. <atom:link rel="payment" href="https://example.org/a" />
  168. <atom:link rel="http://foobar.tld" href="https://example.org/b" />
  169. </item>
  170. </channel>
  171. </rss>`
  172. feed, err := Parse(bytes.NewBufferString(data))
  173. if err != nil {
  174. t.Error(err)
  175. }
  176. if feed.Entries[0].URL != "https://example.org/b" {
  177. t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL)
  178. }
  179. }
  180. func TestParseFeedURLWithAtomLink(t *testing.T) {
  181. data := `<?xml version="1.0" encoding="utf-8"?>
  182. <rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
  183. <channel>
  184. <title>Example</title>
  185. <link>https://example.org/</link>
  186. <atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
  187. </channel>
  188. </rss>`
  189. feed, err := Parse(bytes.NewBufferString(data))
  190. if err != nil {
  191. t.Error(err)
  192. }
  193. if feed.FeedURL != "https://example.org/rss" {
  194. t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
  195. }
  196. if feed.SiteURL != "https://example.org/" {
  197. t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
  198. }
  199. }
  200. func TestParseEntryWithAtomAuthor(t *testing.T) {
  201. data := `<?xml version="1.0" encoding="utf-8"?>
  202. <rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
  203. <channel>
  204. <title>Example</title>
  205. <link>https://example.org/</link>
  206. <atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
  207. <item>
  208. <title>Test</title>
  209. <link>https://example.org/item</link>
  210. <author xmlns:author="http://www.w3.org/2005/Atom">
  211. <name>Foo Bar</name>
  212. <title>Vice President</title>
  213. <department/>
  214. <company>FooBar Inc.</company>
  215. </author>
  216. </item>
  217. </channel>
  218. </rss>`
  219. feed, err := Parse(bytes.NewBufferString(data))
  220. if err != nil {
  221. t.Error(err)
  222. }
  223. if feed.Entries[0].Author != "Foo Bar" {
  224. t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
  225. }
  226. }
  227. func TestParseEntryWithDublinCoreAuthor(t *testing.T) {
  228. data := `<?xml version="1.0" encoding="utf-8"?>
  229. <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
  230. <channel>
  231. <title>Example</title>
  232. <link>https://example.org/</link>
  233. <item>
  234. <title>Test</title>
  235. <link>https://example.org/item</link>
  236. <dc:creator>Me (me@example.com)</dc:creator>
  237. </item>
  238. </channel>
  239. </rss>`
  240. feed, err := Parse(bytes.NewBufferString(data))
  241. if err != nil {
  242. t.Error(err)
  243. }
  244. if feed.Entries[0].Author != "Me (me@example.com)" {
  245. t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
  246. }
  247. }
  248. func TestParseEntryWithItunesAuthor(t *testing.T) {
  249. data := `<?xml version="1.0" encoding="utf-8"?>
  250. <rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
  251. <channel>
  252. <title>Example</title>
  253. <link>https://example.org/</link>
  254. <item>
  255. <title>Test</title>
  256. <link>https://example.org/item</link>
  257. <itunes:author>Someone</itunes:author>
  258. </item>
  259. </channel>
  260. </rss>`
  261. feed, err := Parse(bytes.NewBufferString(data))
  262. if err != nil {
  263. t.Error(err)
  264. }
  265. if feed.Entries[0].Author != "Someone" {
  266. t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
  267. }
  268. }
  269. func TestParseFeedWithItunesAuthor(t *testing.T) {
  270. data := `<?xml version="1.0" encoding="utf-8"?>
  271. <rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
  272. <channel>
  273. <title>Example</title>
  274. <link>https://example.org/</link>
  275. <itunes:author>Someone</itunes:author>
  276. <item>
  277. <title>Test</title>
  278. <link>https://example.org/item</link>
  279. </item>
  280. </channel>
  281. </rss>`
  282. feed, err := Parse(bytes.NewBufferString(data))
  283. if err != nil {
  284. t.Error(err)
  285. }
  286. if feed.Entries[0].Author != "Someone" {
  287. t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
  288. }
  289. }
  290. func TestParseEntryWithDublinCoreDate(t *testing.T) {
  291. data := `<?xml version="1.0" encoding="utf-8"?>
  292. <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
  293. <channel>
  294. <title>Example</title>
  295. <link>http://example.org/</link>
  296. <item>
  297. <title>Item 1</title>
  298. <link>http://example.org/item1</link>
  299. <description>Description.</description>
  300. <guid isPermaLink="false">UUID</guid>
  301. <dc:date>2002-09-29T23:40:06-05:00</dc:date>
  302. </item>
  303. </channel>
  304. </rss>`
  305. feed, err := Parse(bytes.NewBufferString(data))
  306. if err != nil {
  307. t.Error(err)
  308. }
  309. location, _ := time.LoadLocation("EST")
  310. expectedDate := time.Date(2002, time.September, 29, 23, 40, 06, 0, location)
  311. if !feed.Entries[0].Date.Equal(expectedDate) {
  312. t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
  313. }
  314. }
  315. func TestParseEntryWithContentEncoded(t *testing.T) {
  316. data := `<?xml version="1.0" encoding="utf-8"?>
  317. <rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
  318. <channel>
  319. <title>Example</title>
  320. <link>http://example.org/</link>
  321. <item>
  322. <title>Item 1</title>
  323. <link>http://example.org/item1</link>
  324. <description>Description.</description>
  325. <guid isPermaLink="false">UUID</guid>
  326. <content:encoded><![CDATA[<p><a href="http://www.example.org/">Example</a>.</p>]]></content:encoded>
  327. </item>
  328. </channel>
  329. </rss>`
  330. feed, err := Parse(bytes.NewBufferString(data))
  331. if err != nil {
  332. t.Error(err)
  333. }
  334. if feed.Entries[0].Content != `<p><a href="http://www.example.org/">Example</a>.</p>` {
  335. t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
  336. }
  337. }
  338. func TestParseEntryWithFeedBurnerLink(t *testing.T) {
  339. data := `<?xml version="1.0" encoding="utf-8"?>
  340. <rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
  341. <channel>
  342. <title>Example</title>
  343. <link>http://example.org/</link>
  344. <item>
  345. <title>Item 1</title>
  346. <link>http://example.org/item1</link>
  347. <feedburner:origLink>http://example.org/original</feedburner:origLink>
  348. </item>
  349. </channel>
  350. </rss>`
  351. feed, err := Parse(bytes.NewBufferString(data))
  352. if err != nil {
  353. t.Error(err)
  354. }
  355. if feed.Entries[0].URL != "http://example.org/original" {
  356. t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].URL)
  357. }
  358. }
  359. func TestParseEntryTitleWithWhitespaces(t *testing.T) {
  360. data := `<?xml version="1.0" encoding="utf-8"?>
  361. <rss version="2.0">
  362. <channel>
  363. <title>Example</title>
  364. <link>http://example.org</link>
  365. <item>
  366. <title>
  367. Some Title
  368. </title>
  369. <link>http://www.example.org/entries/1</link>
  370. <pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
  371. </item>
  372. </channel>
  373. </rss>`
  374. feed, err := Parse(bytes.NewBufferString(data))
  375. if err != nil {
  376. t.Error(err)
  377. }
  378. if feed.Entries[0].Title != "Some Title" {
  379. t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
  380. }
  381. }
  382. func TestParseEntryWithEnclosures(t *testing.T) {
  383. data := `<?xml version="1.0" encoding="utf-8"?>
  384. <rss version="2.0">
  385. <channel>
  386. <title>My Podcast Feed</title>
  387. <link>http://example.org</link>
  388. <author>some.email@example.org</author>
  389. <item>
  390. <title>Podcasting with RSS</title>
  391. <link>http://www.example.org/entries/1</link>
  392. <description>An overview of RSS podcasting</description>
  393. <pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
  394. <guid isPermaLink="true">http://www.example.org/entries/1</guid>
  395. <enclosure url="http://www.example.org/myaudiofile.mp3"
  396. length="12345"
  397. type="audio/mpeg" />
  398. </item>
  399. </channel>
  400. </rss>`
  401. feed, err := Parse(bytes.NewBufferString(data))
  402. if err != nil {
  403. t.Error(err)
  404. }
  405. if len(feed.Entries) != 1 {
  406. t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
  407. }
  408. if feed.Entries[0].URL != "http://www.example.org/entries/1" {
  409. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  410. }
  411. if len(feed.Entries[0].Enclosures) != 1 {
  412. t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
  413. }
  414. if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
  415. t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
  416. }
  417. if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
  418. t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
  419. }
  420. if feed.Entries[0].Enclosures[0].Size != 12345 {
  421. t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
  422. }
  423. }
  424. func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) {
  425. data := `<?xml version="1.0" encoding="utf-8"?>
  426. <rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
  427. <channel>
  428. <title>My Example Feed</title>
  429. <link>http://example.org</link>
  430. <author>some.email@example.org</author>
  431. <item>
  432. <title>Example Item</title>
  433. <link>http://www.example.org/entries/1</link>
  434. <enclosure
  435. url="http://feedproxy.google.com/~r/example/~5/lpMyFSCvubs/File.mp3"
  436. length="76192460"
  437. type="audio/mpeg" />
  438. <feedburner:origEnclosureLink>http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3</feedburner:origEnclosureLink>
  439. </item>
  440. </channel>
  441. </rss>`
  442. feed, err := Parse(bytes.NewBufferString(data))
  443. if err != nil {
  444. t.Error(err)
  445. }
  446. if len(feed.Entries) != 1 {
  447. t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
  448. }
  449. if feed.Entries[0].URL != "http://www.example.org/entries/1" {
  450. t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
  451. }
  452. if len(feed.Entries[0].Enclosures) != 1 {
  453. t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
  454. }
  455. if feed.Entries[0].Enclosures[0].URL != "http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3" {
  456. t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
  457. }
  458. if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
  459. t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
  460. }
  461. if feed.Entries[0].Enclosures[0].Size != 76192460 {
  462. t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
  463. }
  464. }
  465. func TestParseEntryWithRelativeURL(t *testing.T) {
  466. data := `<?xml version="1.0" encoding="utf-8"?>
  467. <rss version="2.0">
  468. <channel>
  469. <link>https://example.org/</link>
  470. <item>
  471. <link>item.html</link>
  472. </item>
  473. </channel>
  474. </rss>`
  475. feed, err := Parse(bytes.NewBufferString(data))
  476. if err != nil {
  477. t.Error(err)
  478. }
  479. if feed.Entries[0].Title != "https://example.org/item.html" {
  480. t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
  481. }
  482. }
  483. func TestParseInvalidXml(t *testing.T) {
  484. data := `garbage`
  485. _, err := Parse(bytes.NewBufferString(data))
  486. if err == nil {
  487. t.Error("Parse should returns an error")
  488. }
  489. if _, ok := err.(errors.LocalizedError); !ok {
  490. t.Error("The error returned must be a LocalizedError")
  491. }
  492. }